#!/bin/bash
export TRAIN_FILE=/data/paraphrase/wikianswers/all_text.txt
export CUDA_VISIBLE_DEVICES=0,1,2,3
export MODEL_PATH=/data/title_generation/models/unilm_base_uncased_v1.2/pytorch_model.bin
export OUTPUT_DIR=/data/paraphrase/models/wiki_lm
export CACHE_DIR=/data/paraphrase/models/cache_lm

python run_language_modeling.py \
    --output_dir $OUTPUT_DIR --overwrite_output_dir \
    --model_type bert \
    --model_name_or_path $MODEL_PATH --tokenizer_name bert-base-uncased --config_name bert-base-uncased \
    --do_train --cache_dir $CACHE_DIR --per_device_train_batch_size 32 --block_size 64\
    --train_data_file $TRAIN_FILE \
    --mlm --line_by_line --save_steps 30000
